Project_V1

Code
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(plotly))
suppressPackageStartupMessages(library(fmsb))

data = read_excel("Project_1_Data.xlsx", sheet = "pooled123")

filteredData = select(data, Date, PID, BSSQ_1:BSSQ_16, ASSQ_1:ASSQ_16, age, VRexperience, ssq_full, gender, sm)
#we only want to look at those that did not undergo social modelling conditions
filteredData = subset(filteredData, sm == "NO_SM")

#calculating differences between baseline and active SSQ for each symptom
filteredData = mutate(filteredData, d_discomfort = ASSQ_1 - BSSQ_1)
filteredData = mutate(filteredData, d_fatigue = ASSQ_2 - BSSQ_2)
filteredData = mutate(filteredData, d_headache = ASSQ_3 - BSSQ_3)
filteredData = mutate(filteredData, d_eyestrain = ASSQ_4 - BSSQ_4)
filteredData = mutate(filteredData, d_difficulty_focusing = ASSQ_5 - BSSQ_5)
filteredData = mutate(filteredData, d_salivation = ASSQ_6 - BSSQ_6)
filteredData = mutate(filteredData, d_sweating = ASSQ_7 - BSSQ_7)
filteredData = mutate(filteredData, d_nausea = ASSQ_8 - BSSQ_8)
filteredData = mutate(filteredData, d_difficulty_concentrating = ASSQ_9 - BSSQ_9)
filteredData = mutate(filteredData, d_fullness_of_head = ASSQ_10 - BSSQ_10)
filteredData = mutate(filteredData, d_blurred_vision = ASSQ_11 - BSSQ_11)
filteredData = mutate(filteredData, d_dizziness_o = ASSQ_12 - BSSQ_12)
filteredData = mutate(filteredData, d_dizziness_c = ASSQ_13 - BSSQ_13)
filteredData = mutate(filteredData, d_vertigo = ASSQ_14 - BSSQ_14)
filteredData = mutate(filteredData, d_stomach_awareness = ASSQ_15 - BSSQ_15)
filteredData = mutate(filteredData, d_burping = ASSQ_16 - BSSQ_16)


#reclasss VR experience as factor (was chr)
filteredData$VRexperience = as.factor(filteredData$VRexperience)

#we want to filter this data even further and split it into age groups
#once in age groups, calculate the mean change for each of the age groups for each symptom
filteredData = mutate(filteredData, age_group = case_when(
  age >= 16 & age <= 21 ~ "16 to 21",
  age >= 22 & age <= 29 ~ "22 to 29",
  age >= 30 & age <= 37 ~ "30 to 37",
  age >= 38 & age <= 45 ~ "38 to 45",
  age > 45 ~ "above 45"
))

#convert the age groups into factors
filteredData$age_group = as.factor(filteredData$age_group)

#renaming columns to be more informative
names(filteredData)[names(filteredData) == 'BSSQ_1'] <- 'BSSQ_discomfort'
names(filteredData)[names(filteredData) == 'BSSQ_2'] <- 'BSSQ_fatigue'
names(filteredData)[names(filteredData) == 'BSSQ_3'] <- 'BSSQ_headache'
names(filteredData)[names(filteredData) == 'BSSQ_4'] <- 'BSSQ_eyestrain'
names(filteredData)[names(filteredData) == 'BSSQ_5'] <- 'BSSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'BSSQ_6'] <- 'BSSQ_salivation'
names(filteredData)[names(filteredData) == 'BSSQ_7'] <- 'BSSQ_sweating'
names(filteredData)[names(filteredData) == 'BSSQ_8'] <- 'BSSQ_nausea'
names(filteredData)[names(filteredData) == 'BSSQ_9'] <- 'BSSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'BSSQ_10'] <- 'BSSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'BSSQ_11'] <- 'BSSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'BSSQ_12'] <- 'BSSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'BSSQ_13'] <- 'BSSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'BSSQ_14'] <- 'BSSQ_vertigo'
names(filteredData)[names(filteredData) == 'BSSQ_15'] <- 'BSSQ_stomach_awareness'
names(filteredData)[names(filteredData) == 'BSSQ_16'] <- 'BSSQ_burping'





names(filteredData)[names(filteredData) == 'ASSQ_1'] <- 'ASSQ_discomfort'
names(filteredData)[names(filteredData) == 'ASSQ_2'] <- 'ASSQ_fatigue'
names(filteredData)[names(filteredData) == 'ASSQ_3'] <- 'ASSQ_headache'
names(filteredData)[names(filteredData) == 'ASSQ_4'] <- 'ASSQ_eyestrain'
names(filteredData)[names(filteredData) == 'ASSQ_5'] <- 'ASSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'ASSQ_6'] <- 'ASSQ_salivation'
names(filteredData)[names(filteredData) == 'ASSQ_7'] <- 'ASSQ_sweating'
names(filteredData)[names(filteredData) == 'ASSQ_8'] <- 'ASSQ_nausea'
names(filteredData)[names(filteredData) == 'ASSQ_9'] <- 'ASSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'ASSQ_10'] <- 'ASSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'ASSQ_11'] <- 'ASSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'ASSQ_12'] <- 'ASSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'ASSQ_13'] <- 'ASSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'ASSQ_14'] <- 'ASSQ_vertigo'
names(filteredData)[names(filteredData) == 'ASSQ_15'] <- 'ASSQ_stomach_awareness'
names(filteredData)[names(filteredData) == 'ASSQ_16'] <- 'ASSQ_burping'


#isolating the data dictionary
data_dict = read_excel("Project_1_Data.xlsx", sheet = "data_dictionary")

#filter into groups with experience or not
withVRexperience = filter(filteredData, VRexperience == 'Yes')
noVRexperience = filter(filteredData, VRexperience == 'No')

#taking averages for with/without experience
yes_avg_d_discomfort = mean(withVRexperience$d_discomfort)
yes_avg_d_fatigue = mean(withVRexperience$d_fatigue)
yes_avg_d_headache = mean(withVRexperience$d_headache)
yes_avg_d_eyestrain = mean(withVRexperience$d_eyestrain)
yes_avg_d_difficulty_focusing = mean(withVRexperience$d_difficulty_focusing)
yes_avg_d_salivation = mean(withVRexperience$d_salivation)
yes_avg_d_sweating = mean(withVRexperience$d_sweating)
yes_avg_d_nausea = mean(withVRexperience$d_nausea)
yes_avg_d_difficulty_concentrating = mean(withVRexperience$d_difficulty_concentrating)
yes_avg_d_fullness = mean(withVRexperience$d_fullness_of_head)
yes_avg_d_vision = mean(withVRexperience$d_blurred_vision)
yes_avg_d_dizziness_o = mean(withVRexperience$d_dizziness_o)
yes_avg_d_dizziness_c = mean(withVRexperience$d_dizziness_c)
yes_avg_d_vertigo = mean(withVRexperience$d_vertigo)
yes_avg_d_stomach = mean(withVRexperience$d_stomach_awareness)
yes_avg_d_burping = mean(withVRexperience$d_burping)


no_avg_d_discomfort = mean(noVRexperience$d_discomfort)
no_avg_d_fatigue = mean(noVRexperience$d_fatigue)
no_avg_d_headache = mean(noVRexperience$d_headache)
no_avg_d_eyestrain = mean(noVRexperience$d_eyestrain)
no_avg_d_difficulty_focusing = mean(noVRexperience$d_difficulty_focusing)
no_avg_d_salivation = mean(noVRexperience$d_salivation)
no_avg_d_sweating = mean(noVRexperience$d_sweating)
no_avg_d_nausea = mean(noVRexperience$d_nausea)
no_avg_d_difficulty_concentrating = mean(noVRexperience$d_difficulty_concentrating)
no_avg_d_fullness = mean(noVRexperience$d_fullness_of_head)
no_avg_d_vision = mean(noVRexperience$d_blurred_vision)
no_avg_d_dizziness_o = mean(noVRexperience$d_dizziness_o)
no_avg_d_dizziness_c = mean(noVRexperience$d_dizziness_c)
no_avg_d_vertigo = mean(noVRexperience$d_vertigo)
no_avg_d_stomach = mean(noVRexperience$d_stomach_awareness)
no_avg_d_burping = mean(noVRexperience$d_burping)


#taking averages for different age groups
grp1 = filter(filteredData, age_group == '16 to 21')
grp2 = filter(filteredData, age_group == '22 to 29')
grp3 = filter(filteredData, age_group == '30 to 37')
grp4 = filter(filteredData, age_group == '38 to 45')
grp5 = filter(filteredData, age_group == 'above 45')

mean_sqq_ages = c(mean(grp1$ssq_full),
                  mean(grp2$ssq_full),
                  mean(grp3$ssq_full),
                  mean(grp4$ssq_full),
                  mean(grp5$ssq_full)
                  )

data_by_age_group = data.frame(
  age_group = c("16 to 21", '22 to 29','30 to 37','38 to 45','Above 45'),
  mean_ssq = mean_sqq_ages
  
)

filteredData = mutate(filteredData, "generation" = case_when(year(Date) - age >= 2010 ~ "a", year(Date) - age >= 1997 ~ "Z", year(Date) - age >= 1981 ~ "Y", year(Date) - age >= 1965 ~ "X", TRUE ~ "Old"))

Summary of Findings

(executive summary goes here)

Initial Data Analysis (IDA)

Source

Our data was sourced from Cosette Saunder’s paper “Socially Acquired Nocebo Effects Generalize but Are Not Attenuated by Choice”.

Structure

The paper included 336 participants, each with 51 variables recorded. Our project focused the following variables:

  • Baseline SSQ (BSSQ), Active SSQ (ASSQ), and SSQ_Fullof 16 symptoms (quantitative, discrete): self-reported symptom severity before and after undergoing VR respectively, on a scale of 1 to 10.

  • Participants’ VRexperience (qualitative, nominal); sorted by experience to see how symptoms differed.

  • age of participants (quantitative, discrete); they were then sorted into age groups.

  • The change (\(\Delta\)) between BSSQ and ASSQ was calculated for each participant for each symptom, and we took the average \(\Delta\) for each symptom for each group (VRexperience and age_group) (quantitative, discrete).

Code
data_age_groups = select(filteredData, age_group)
groups_counted = data_age_groups %>% count(age_group)

age_pie = plot_ly(groups_counted, labels = ~age_group, values = ~n,
                 type = 'pie', direction = 'clockwise', sort = FALSE, rotation = 30)
age_pie <- age_pie %>% layout(title = 'Distribution of ages',
                            showlegend = TRUE)
age_pie
Code
data_experience = select(filteredData, VRexperience)
exp_counted = data_experience %>% count(VRexperience)

vr_pie = plot_ly(exp_counted, labels = ~VRexperience, values = ~n,
                 type = 'pie')
vr_pie <- vr_pie %>% layout(title = 'Distribution of VR experience',
                            showlegend = TRUE)

vr_pie

Limitations

  • Self-reporting bias for both BSSQ and ASSQ which makes the values prone to being over or underestimates by each participant.

  • VRexperience is a binary qualitative classification, it is not descriptive of the nature, amount or frequency, and that those with more than 10 VR experiences were excluded.

Research Question

What is the effect of having VR experience on the symptoms experienced by people?

Code
library(plotly)

yes_means = c(yes_avg_d_discomfort,
              yes_avg_d_fatigue,
              yes_avg_d_headache,
              yes_avg_d_eyestrain,
              yes_avg_d_sweating,
              yes_avg_d_nausea,
              yes_avg_d_dizziness_c,
              yes_avg_d_dizziness_o,
              yes_avg_d_difficulty_focusing,
              yes_avg_d_difficulty_concentrating,
              yes_avg_d_salivation,
              yes_avg_d_vision,
              yes_avg_d_vertigo,
              yes_avg_d_stomach,
              yes_avg_d_fullness,
              yes_avg_d_burping
              )

no_means = c(no_avg_d_discomfort,
             no_avg_d_fatigue,
             no_avg_d_headache,
             no_avg_d_eyestrain,
             no_avg_d_sweating,
             no_avg_d_nausea,
             no_avg_d_dizziness_c,
             no_avg_d_dizziness_o,
             no_avg_d_difficulty_focusing,
             no_avg_d_difficulty_concentrating,
             no_avg_d_salivation,
             no_avg_d_vision,
             no_avg_d_vertigo,
             no_avg_d_stomach,
             no_avg_d_fullness,
             no_avg_d_burping
             )

fig <- plot_ly(
  type  = 'scatterpolar',
  fill = 'toself',
  title = "Average change in BSSQ and ASSQ depending on VR experience for each symptom"
)

fig <- fig %>% 
  add_trace(
    r = yes_means,
    theta = c('Discomfort', 'Fatigue', 'Headache', 'Eyestrain', 'Sweating', 'Nausea', 'Dizziness (closed)', 'Dizziness (open)', 'Difficulty Focusing', 'Difficulty Concentrating', 'Salivation', 'Blurry Vision', 'Vertigo', 'Stomach Awareness', 'Fullness of head','Burping'),
    name = 'With VR Experience'
  )

fig <- fig %>% 
  add_trace(
    r = no_means,
    theta = c('Discomfort', 'Fatigue', 'Headache', 'Eyestrain', 'Sweating', 'Nausea','Dizziness (closed)', 'Dizziness (open)', 'Difficulty Focusing', 'Difficulty Concentrating', 'Salivation', 'Blurry Vision',  'Vertigo', 'Stomach Awareness', 'Fullness of head','Burping'),
    name = 'No VR Experience'
  )

fig <- fig %>%
  layout(
    polar = list(
      radialaxis = list(
        visible = T,
        range = c(-0.5,2)
      )
    ),
    showlegend = T
  )

fig

When took the average change between the BSSQ and ASSQ, and visualized it in the spiderchart. We can see that both groups experience similar \(\Delta\) (change) in most symptoms. Interestingly, the VR experienced group saw

This observation is in contrast to

We look further into those symptoms with the greatest differences in mean \(\Delta\) : Dizziness (Eyes closed) , Vertigo and Stomach Awareness.

Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = as.factor(gender) , y = ssq_full, group = VRexperience, fill = VRexperience)) +
  geom_boxplot(position = "dodge") +
  labs(x = "gender",  y = "Full SSQ")
plt

Code
ggplotly(plt) %>%  layout(boxmode = "group")
Warning: 'layout' objects don't have these attributes: 'boxmode'
Valid attributes include:
'_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = VRexperience, y = d_dizziness_c)) +
  geom_boxplot() +
  labs(x = "VR Experience",  y = "Change in BSSQ and ASSQ-Dizziness (Eyes Closed)")

ggplotly(plt)
Code
#fivenum(withVRexperience$d_dizziness_c)
#fivenum(noVRexperience$d_dizziness_c)
#
#mean(withVRexperience$d_dizziness_c)
#mean(noVRexperience$d_dizziness_c)
#
#sd(withVRexperience$d_dizziness_c)
#sd(noVRexperience$d_dizziness_c)


#since from the boxplots we can see that both sets have a fair number of outliers which could affect the mean, the median + IQR may be a better measure of spread
Code
library(tidyverse)
library(plotly)
library(gganimate)
library(gifski)

plt = ggplot(filteredData, aes(y = d_dizziness_c, x = age, colour = VRexperience)) +
  geom_point() +
  labs(x = "Age",  y = "Dizziness (Eyes Closed)", col = "VR Experience") +
  transition_states(VRexperience, transition_length = 1, state_length = 1) +
  enter_fade() + exit_fade()

#animate(plt, renderer = gifski_renderer())
ggplotly(plt)
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = age_group, y = d_dizziness_c)) +
  geom_boxplot() +
  labs(x = "Age Group",  y = "Change in BSSQ and ASSQ-Dizziness(Eyes Closed)")

ggplotly(plt)
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = VRexperience, y = d_vertigo)) +
  geom_boxplot() +
  labs(x = "VR Experience",  y = "Vertigo")

ggplotly(plt)
Code
#fivenum(withVRexperience$d_vertigo)
#fivenum(noVRexperience$d_vertigo)
#
#mean(withVRexperience$d_vertigo)
#mean(noVRexperience$d_vertigo)
#
#sd(withVRexperience$d_vertigo)
#sd(noVRexperience$d_vertigo)
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = age_group, y = d_vertigo)) +
  geom_boxplot() +
  labs(x = "Age Group",  y = "Change in BSSQ and ASSQ - Vertigo")

ggplotly(plt)
Code
library(tidyverse)
library(plotly)
library(gganimate)
library(gifski)

plt = ggplot(filteredData, aes(y = d_vertigo, x = age, colour = VRexperience)) +
  geom_point() +
  labs(x = "Age",  y = "Vertigo", col = "VR Experience") +
  transition_states(VRexperience, transition_length = 1, state_length = 1) +
  enter_fade() + exit_fade()

#animate(plt, renderer = gifski_renderer())
ggplotly(plt)
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = VRexperience, y = d_stomach_awareness)) +
  geom_boxplot() +
  labs(x = "VR Experience",  y = "Change in BSSQ and ASSQ - Stomach Awareness")

ggplotly(plt)
Code
library(tidyverse)
library(plotly)

plt = ggplot(filteredData, aes(x = age_group, y = d_stomach_awareness)) +
  geom_boxplot() +
  labs(x = "Age Group",  y = "Change in BSSQ and ASSQ - Stomach Awareness")

ggplotly(plt)
Code
library(tidyverse)
library(plotly)
library(gganimate)
library(gifski)

plt = ggplot(filteredData, aes(y = d_stomach_awareness, x = age, colour = VRexperience)) +
  geom_point() +
  labs(x = "Age",  y = "Stomach Awareness", col = "VR Experience") +
  transition_states(VRexperience, transition_length = 1, state_length = 1) +
  enter_fade() + exit_fade()

#animate(plt, renderer = gifski_renderer())
ggplotly(plt)
Code
library(plotly)
library(tidyselect)

mean_sqq_ages = c(mean(grp1$ssq_full),
                  mean(grp2$ssq_full),
                  mean(grp3$ssq_full),
                  mean(grp4$ssq_full),
                  mean(grp5$ssq_full)
                  )

fig2 <- plot_ly(
  type = 'scatterpolar',
  fill = 'toself',
  r = mean_sqq_ages,
  theta = c("16 to 21",
            "22 to 29",
            "30 to 37",
            "38 to 45",
            "above 45")
)
fig2 <- fig2 %>%
  layout(
    polar = list(
      radialaxis = list(
        visible = T,
        range = c(0,20)
      )
    ),
    showlegend = T
  )

fig2
No scatterpolar mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Code
fig3 = ggplot(data_by_age_group, aes(x = age_group, y = mean_ssq)) +
  geom_bar(stat = 'identity')



ggplotly(fig3)
Code
library(tidyverse)

p = ggplot(filteredData, aes(x = age, y = ssq_full, colour = VRexperience)) +
  geom_point()

p

Code
plt = ggplot(filteredData, aes(x = VRexperience, y = ssq_full)) +
  geom_boxplot()

ggplotly(plt)
Code
plt2 = ggplot(filteredData, aes(x = age_group, y = ssq_full)) +
  geom_boxplot()

ggplotly(plt2)
Code
library(tidyverse)

yex = filter(filteredData, VRexperience == "Yes")
nox = filter(filteredData, VRexperience == "No")

q1 = quantile(yex$age, probs = c(0.25), na.rm = TRUE) |> as.numeric()
q2 = quantile(nox$age, probs = c(0.25), na.rm = TRUE) |> as.numeric()
# https://forum.posit.co/t/make-the-r-function-quantile-return-a-numeric-value/178084
q3 = 2*q2-q1

ggplot(filteredData, aes(x = VRexperience, y = age)) + geom_boxplot() + geom_hline(yintercept = q1) + geom_hline(yintercept = q2) + geom_hline(yintercept = q3)

Code
filteredData = mutate(filteredData, age_class = case_when(age > q3 ~ "older",
                                                      age > q2 ~ "old",
                                                      age > q1 ~ "young",
                                                      age > 0 ~ "younger"))

ggplot(filter(filteredData, age_class == "old" | age_class == "young"), aes(x = age_class, y = ssq_full)) + geom_boxplot()

Code
filteredData["age_group"] = cut(filteredData$age, c(16, 22, 30, 38, 46, Inf), c("16-21", "22-29", "30-37", "38-45", "45+"), include.lowest = TRUE)

ggplot(filteredData, aes(x = VRexperience, y = ssq_full, fill = VRexperience)) + stat_summary(fun = "mean", geom = "bar", position = "dodge")

Code
library(lubridate)

class(filteredData$Date[1])
[1] "POSIXct" "POSIXt" 
Code
d = filteredData$Date[1] |> as.POSIXct()
year(d) # used chatgpt for this because there is no known website in the universe that has this information for SOME REASON
[1] 2021
Code
ggplot(filteredData, aes(x = generation, y = ssq_full)) + geom_boxplot()

Code
ggplot(filteredData, aes(x = age, y = ssq_full, color = generation)) + geom_point()

Professional Standard of Report

Acknowledgements

References